This sample consists of data for 150 subjects of the original sample of 522 that has completed the initial battery of 37 cognitive tasks, 23 surveys and 3 surveys on demographics. Details of the original sample as well as quality control (qc) procedures are described elsewhere (Eisenberg et al., 2017). Invited participants were chosen randomly and only subsets of them were invited for a given batch (instead of opening the battery to all qualified subjects) with the intention to avoid a potential oversampling and bias towards “high self regulators”.
workers = read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/Local/User_717570_workers.csv')
workers = workers %>%
group_by(Worker.ID) %>%
mutate(Retest_worker=ifelse(sum(CURRENT.RetestWorker,CURRENT.RetestWorkerB2,CURRENT.RetestWorkerB3,CURRENT.RetestWorkerB4,CURRENT.RetestWorkerB5,na.rm=T)>0,1,0)) %>%
ungroup()
worker_counts <- fromJSON('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/Local/retest_worker_counts.json')
worker_counts = as.data.frame(unlist(worker_counts))
names(worker_counts) = "task_count"
In total 242 participants were invited, 175 began the battery, 157 completed the battery and 150 provided data that passed qc for both time points. Our target sample size was determined in advance of data collection and data collection continued until this number of participants with data that survived qc was reached.
disc_comp_date = read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/Local/discovery_completion_dates.csv', header=FALSE)
val_comp_date = read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/Local/validation_completion_dates.csv', header=FALSE)
test_comp_date = rbind(disc_comp_date, val_comp_date)
rm(disc_comp_date, val_comp_date)
retest_comp_date = read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/Local/retest_completion_dates.csv', header=FALSE)
comp_dates = merge(retest_comp_date, test_comp_date, by="V1")
names(comp_dates) <- c("sub_id", "retest_comp", "test_comp")
comp_dates$retest_comp = as.Date(comp_dates$retest_comp)
comp_dates$test_comp = as.Date(comp_dates$test_comp)
comp_dates$days_btw = with(comp_dates, retest_comp-test_comp)
Data collection took place on average 115 number of days after the completion of the initial battery with a range of 60 to 228 days.
One the major contributions of this project is a comprehensive literature review of the retest reliabilities of the surveys and tasks that were used. We reviewed the literature on a measure (as opposed to task level) paying attention to differences in sample size, the delay between the two measurements as well as the statistic that was used to assess reliabilities (e.g. Spearman vs. Pearson correlations). Here we present a table and a visualization summarizing our findings. References mentioned in the table below can be found here.
lit_review <- read.csv('/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/input/lit_review_figure.csv')
lit_review
Measure level plot
lit_review = lit_review %>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)]),
raw1_fit0 = grepl('raw', variable_type),
type = as.character(type)) %>%
mutate(task_group = gsub("_", " ", task_group),
var = gsub("_", " ", var)) %>%
arrange(task_group, raw1_fit0, var) %>%
mutate(task_group = ifelse(task_group == "psychological refractory period two choices", "psychological refractory period", ifelse(task_group == "angling risk task always sunny", "angling risk task",task_group))) %>%
mutate(task_group = gsub("survey", "", task_group)) %>%
select(-measure_description, -reference)
Because this plot is difficult to digest we summarize it on a task level to give a general sense of the main takeaways. This plot naturally disregards much of the fine grained information.
p1_t <- lit_review %>%
filter(task == 'task') %>%
ggplot(aes(y = factor(task_group, levels=rev(unique(task_group))), x = retest_reliability)) +
geom_point(aes(size=sample_size, shape = type), color='#00BFC4')+
theme_bw()+
theme(axis.text.y = element_text(size=43),
legend.position = 'none') +
xlab("")+
ylab("")+
scale_x_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
scale_shape_manual(breaks = sort(lit_review$type), values = c(15, 16, 17, 3))+
scale_size_continuous(range = c(5, 50))+
geom_vline(xintercept = 0, color = "red", size = 1)
p2_t <- lit_review %>%
filter(task == 'survey') %>%
ggplot(aes(y = factor(task_group, levels=rev(unique(task_group))), x = retest_reliability)) +
geom_point(aes(size=sample_size, shape = type), color='#F8766D')+
theme_bw()+
theme(axis.text.y = element_text(size=43),
legend.position = 'none') +
xlab("")+
ylab("")+
scale_x_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
scale_shape_manual(breaks = sort(lit_review$type), values = c(15, 16, 17, 3))+
scale_size_continuous(range = c(5, 50))+
geom_vline(xintercept = 0, color = "red", size = 1)
p3_t <- arrangeGrob(p1_t, p2_t, nrow=1)
ggsave('Lit_Review_Plot_t.jpg', plot = p3_t, device = "jpeg", path = "/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/output/figures/", width = 27, height = 48, units = "in", limitsize = FALSE)
rm(p1_t, p2_t, p3_t)
An interactive version of this plot could be find here
Takeaways from this review are: - Survey measures have been reported to higher reliability compared to task measures
- Survey measures have less variability in the reported reliabiltiy estimates compared to task measures’
The variables included in this report are:
- meaningful variables (includes only hdddm parameters)
- EZ diffusion parameters
- Raw RT and Accuracy measures
- Variables found in the literature (for comparison)
test_data <- read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/t1_data/variables_exhaustive.csv')
test_data <- test_data[,names(test_data) %in% retest_report_vars]
test_data$X <- as.character(test_data$X)
names(test_data)[which(names(test_data) == 'X')] <-'sub_id'
For reference here are the variables that are not included in the analyses of the remainder of this report because they were not of theoretical interest in factor structure analyses of this data so far. These include drift diffusion and other model parameters for specific conditions within a task; survey variables that are not part of the dependant variables for that survey in the literature and demographics (these are saved for prediction analyses).
test_data2 <- read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/t1_data/variables_exhaustive.csv')
df <- data.frame(names(test_data2)[which(names(test_data2) %in% names(test_data) == FALSE)])
names(df) = c('vars')
df
retest_data <- read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_09-27-2017/variables_exhaustive.csv')
retest_data <- retest_data[,names(retest_data) %in% retest_report_vars]
retest_data$X <- as.character(retest_data$X)
names(retest_data)[which(names(retest_data) == 'X')] <-'sub_id'
retest_data = retest_data[retest_data$sub_id %in% test_data$sub_id,]
Since HDDM parameters depend on the sample on which they are fit we refit the model on t1 data for the subjects that have t2 data. Here we replace the HDDM parameters in the current t1 dataset with these refitted values.
hddm_refits <- read.csv('/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/Data/Retest_03-21-2017/hddm_refits_exhaustive.csv')
hddm_refits = hddm_refits[,names(hddm_refits) %in% retest_report_vars]
hddm_refits$X <- as.character(hddm_refits$X)
names(hddm_refits)[which(names(hddm_refits) == 'X')] <-'sub_id'
test_data = cbind(test_data$sub_id, test_data[,names(test_data) %in% names(hddm_refits) == FALSE])
names(test_data)[which(names(test_data) == 'test_data$sub_id')] <-'sub_id'
test_data = merge(test_data, hddm_refits, by="sub_id")
For correlations we report Spearman’s \(\rho\)’s only.
Based on Weir (2005) ICC(3,k) does not take in to account within subject differences between two time points (i.e. the fixed effect of time/systematic error). Thus, it is well approximated by Pearson’s r and subject to similar criticisms. Weir (2005) suggests reporting at least this systematic error effect size if one chooses to report with ICC(3,k). Based on his conclusions here I report:
- ICC(3,k): As Dave clarified this ranges from 1 to -1/(number of repeated measures -1) so in our case this range would be [-1, 1]; larger values would mean that the two scores of a subject for a given measure are more similar to each other than they are to scores of other people
- partial \(\eta^2\) for time (\(SS_{time}/SS_{within}\)): effect size of time
- SEM (\(\sqrt(MS_{error})\)): standard error of measurement; the smaller the better
numeric_cols = c()
for(i in 1:length(names(test_data))){
if(is.numeric(test_data[,i])){
numeric_cols <- c(numeric_cols, names(test_data)[i])
}
}
match_t1_t2 <- function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', format = "long", sample = 'full', sample_vec){
if(sample == 'full'){
df = merge(t1_df[,c(merge_var, dv_var)], t2_df[,c(merge_var, dv_var)], by = merge_var)
}
else{
df = merge(t1_df[t1_df[,merge_var] %in% sample_vec, c(merge_var, dv_var)], t2_df[t2_df[,merge_var] %in% sample_vec, c(merge_var, dv_var)],
by=merge_var)
}
df = df %>%
na.omit()%>%
gather(dv, score, -sub_id) %>%
mutate(time = ifelse(grepl('\\.x', dv), 1, ifelse(grepl('\\.y', dv), 2, NA))) %>%
separate(dv, c("dv", "drop"), sep='\\.([^.]*)$') %>%
select(-drop)
if(format == 'wide'){
df = df%>% spread(time, score)
}
return(df)
}
get_spearman = function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide')
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide', sample='bootstrap', sample_vec = sample_vec)
}
rho = cor(df$`1`, df$`2`, method='spearman')
return(rho)
}
get_icc <- function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide')
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide', sample='bootstrap', sample_vec = sample_vec)
}
df = df %>% select(-dv, -sub_id)
icc = ICC(df)
icc_3k = icc$results['Average_fixed_raters', 'ICC']
return(icc_3k)
}
get_pearson = function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide')
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide', sample='bootstrap', sample_vec = sample_vec)
}
rho = cor(df$`1`, df$`2`, method='pearson')
return(rho)
}
get_var_breakdown <- function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide')
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, format='wide', sample='bootstrap', sample_vec = sample_vec)
}
df = df %>% select(-dv, -sub_id)
icc = ICC(df)
var_breakdown = data.frame(subs = icc$summary[[1]][1,'Mean Sq'],
ind = icc$summary[[1]][2,'Mean Sq'],
resid = icc$summary[[1]][3,'Mean Sq'])
return(var_breakdown)
}
get_eta <- function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var)
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, sample='bootstrap', sample_vec = sample_vec)
}
mod = summary(aov(score~Error(sub_id)+time, df))
ss_time = as.data.frame(unlist(mod$`Error: Within`))['Sum Sq1',]
ss_error = as.data.frame(unlist(mod$`Error: Within`))['Sum Sq2',]
eta = ss_time/(ss_time+ss_error)
return(eta)
}
get_sem <- function(dv_var, t1_df = test_data, t2_df = retest_data, merge_var = 'sub_id', sample='full', sample_vec){
if(sample=='full'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var)
}
else if(sample=='bootstrap'){
df = match_t1_t2(dv_var, t1_df = t1_df, t2_df = t2_df, merge_var = merge_var, sample='bootstrap', sample_vec = sample_vec)
}
mod = summary(aov(score~Error(sub_id)+time, df))
ms_error = as.data.frame(unlist(mod$`Error: Within`))['Mean Sq2',]
sem = sqrt(ms_error)
return(sem)
}
rel_df <- data.frame(spearman = rep(NA, length(numeric_cols)),
icc = rep(NA, length(numeric_cols)),
pearson = rep(NA, length(numeric_cols)),
eta_sq = rep(NA, length(numeric_cols)),
sem = rep(NA, length(numeric_cols)),
var_subs = rep(NA, length(numeric_cols)),
var_ind = rep(NA, length(numeric_cols)),
var_resid = rep(NA, length(numeric_cols)))
row.names(rel_df) <- numeric_cols
for(i in 1:length(numeric_cols)){
rel_df[numeric_cols[i], 'spearman'] <- get_spearman(numeric_cols[i])
rel_df[numeric_cols[i], 'icc'] <- get_icc(numeric_cols[i])
rel_df[numeric_cols[i], 'pearson'] <- get_pearson(numeric_cols[i])
rel_df[numeric_cols[i], 'eta_sq'] <- get_eta(numeric_cols[i])
rel_df[numeric_cols[i], 'sem'] <- get_sem(numeric_cols[i])
rel_df[numeric_cols[i], 'var_subs'] <- get_var_breakdown(numeric_cols[i])$subs
rel_df[numeric_cols[i], 'var_ind'] <- get_var_breakdown(numeric_cols[i])$ind
rel_df[numeric_cols[i], 'var_resid'] <- get_var_breakdown(numeric_cols[i])$resid
}
rel_df$dv = row.names(rel_df)
row.names(rel_df) = seq(1:nrow(rel_df))
rel_df$task = 'task'
rel_df[grep('survey', rel_df$dv), 'task'] = 'survey'
rel_df[grep('holt', rel_df$dv), 'task'] = "task"
rel_df = rel_df %>%
select(dv, task, spearman, icc, pearson, eta_sq, sem, var_subs, var_ind, var_resid)
# row.names(rel_df) = NULL
Average reliability metrics and the number of variables for both the surveys and the tasks Both Spearman \(\rho\)’s as well the ICC’s are higher for surveys.
Systematic differences between the two measurements (\(\eta^2\)) are higher for tasks than surveys (potentially indicating learning effects?).
Unexpectedly, median standard error of measurement is higher for surveys but the distribution below suggests that this is a side effect of having too many task measures with very low SEMs.
rel_df %>%
group_by(task) %>%
summarise(median_spearman = median(spearman),
median_icc = median(icc),
median_pearson = median(pearson),
median_eta_sq = median(eta_sq),
median_sem = median(sem),
num_vars = n()) %>%
datatable() %>%
formatRound(columns=c('median_spearman', 'median_icc', 'median_pearson','median_eta_sq', 'median_sem'), digits=3)
Table of reliability point estimates
rel_df %>%
datatable(filter='top') %>%
formatRound(columns=c('spearman', 'icc', 'pearson','eta_sq', 'sem'), digits=3)
rel_df %>%
select(dv, task, spearman, pearson, icc) %>%
gather(key, value, -dv, -task) %>%
ggplot(aes(value, fill=task))+
geom_histogram(alpha = 0.5, position = 'identity')+
facet_wrap(~key)+
theme_bw()+
theme(legend.position = 'bottom',
legend.title = element_blank())+
xlab("")
Task measures have significanlty lower reliability (here checking only on point estimates of ICC).
summary(lm(icc~task, rel_df))
Call:
lm(formula = icc ~ task, data = rel_df)
Residuals:
Min 1Q Median 3Q Max
-0.8362 -0.1217 0.0383 0.1735 0.4252
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.8706 0.0291 29.9 <2e-16 ***
tasktask -0.4105 0.0325 -12.6 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.242 on 344 degrees of freedom
Multiple R-squared: 0.317, Adjusted R-squared: 0.315
F-statistic: 159 on 1 and 344 DF, p-value: <2e-16
p1 = rel_df %>%
ggplot(aes(spearman, icc, col=task))+
geom_point()+
theme_bw()+
theme(legend.title = element_blank(),
legend.position = "none")+
geom_abline(intercept = 0, slope=1)
p2 = rel_df %>%
ggplot(aes(pearson, icc, col=task))+
geom_point()+
theme_bw()+
theme(legend.title = element_blank(),
legend.position = "none")+
geom_abline(intercept = 0, slope=1)
p3 = rel_df %>%
ggplot(aes(pearson, spearman, col=task))+
geom_point()+
theme_bw()+
theme(legend.title = element_blank(),
legend.position = "none")+
geom_abline(intercept = 0, slope=1)
grid.arrange(p1, p2, p3, nrow=1)
Note: Some variables have <0 ICC’s. This would be the case if the \(MS_{error}\)>\(MS_{between}\). Data for these variables have no relationship between the two time points.
Here we summarize the results on a task level to make it more digestable and easier to make contact with the literature.
With this in mind we reduce the list of task measures to a list of one per task by averaging only the raw measures from all the trials in a task. We chose to reduce the information in this manner to avoid any bias stemming from differential amount of interest and procedures applied to certain tasks over others (e.g. a task can have over 10 measures because it has multiple conditions and we have chosen to fit DDM’s for specific conditions while another might only have 2 due to our relative inexperience and lack of interest in it). We check whether the number of trials in a task has a significant effect on these average reliabilities of raw measures as well.
We filter out the DDM parameters and measures for specific contrasts. Note that this does leave some tasks with measures that are model fits and/or for specific conditions (because at least the current datasets do not include measures that are based on all the trials and are raw though I could dive in to variables_exhaustive for such measures. For example the average relialibility for Kirby is based on three discount rates for specific conditions.). Here’s the order of tasks by mean reliability sorted for ICC and then Spearman’s \(\rho\).
tmp = measure_labels %>%
mutate(dv = as.character(dv)) %>%
filter(task == 'task') %>%
left_join(rel_df[,c("dv", "spearman","icc")], by='dv') %>%
separate(variable_type, c("label_1", "label_2", "label_3"), sep = ",") %>%
mutate(label_1 = trim(label_1),
label_2=trim(label_2),
label_3=trim(label_3))%>%
filter(label_1 %in% c('difference', 'EZ', 'hddm') ==FALSE & label_2 %in% c('difference', 'EZ', 'hddm') == FALSE) %>%
separate(dv, c('task_name', 'extra_1', 'extra_2'), sep = '\\.',remove=FALSE) %>%
select(-extra_1, -extra_2) %>%
group_by(task_name) %>%
summarise(median_icc = median(icc),
median_spearman = median(spearman),
num_measures = n(),
num_trials = unique(num_all_trials)) %>%
arrange(-median_icc, -median_spearman)
tmp %>%
datatable() %>%
formatRound(columns=c('median_spearman', 'median_icc'), digits=3)
Does number of items in a task have a significant effect on the average ICC of (mostly) raw measures for all trials from a task? No. (no effect on Spearman either)
summary(lm(median_icc ~ num_trials, data = tmp))
Call:
lm(formula = median_icc ~ num_trials, data = tmp)
Residuals:
Min 1Q Median 3Q Max
-0.6123 -0.0680 0.0411 0.1171 0.2961
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.574967 0.046183 12.45 8.3e-14 ***
num_trials 0.000246 0.000235 1.05 0.3
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.193 on 32 degrees of freedom
(3 observations deleted due to missingness)
Multiple R-squared: 0.0333, Adjusted R-squared: 0.00308
F-statistic: 1.1 on 1 and 32 DF, p-value: 0.302
Does number of measures that went in to this reliability estimate have a significant effect on the average ICC of (mostly) raw measures for all trials from a task? No. (same is true for Spearman)
summary(lm(median_icc ~ num_measures, data = tmp))
Call:
lm(formula = median_icc ~ num_measures, data = tmp)
Residuals:
Min 1Q Median 3Q Max
-0.5929 -0.0629 0.0429 0.1234 0.2430
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.7110 0.0670 10.62 2.4e-12 ***
num_measures -0.0395 0.0238 -1.66 0.11
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.186 on 34 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.0753, Adjusted R-squared: 0.0481
F-statistic: 2.77 on 1 and 34 DF, p-value: 0.105
In this section we label the measures based on the procedure behind the creation of a variable. This moves further away from the literature and thinking about our measures in terms of specific tasks, surveys or cognitive processes and is a more novel way of presenting our data.
Potentially prescriptive recommendations coming out of this section would intend to build intuitions about different treatments to data that have higher reliabilities especially when researchers plan on making their own tasks and collecting different kinds of measures.
tmp = measure_labels %>%
mutate(dv = as.character(dv)) %>%
filter(task == 'task') %>%
left_join(rel_df[,c("dv", "spearman","icc")], by='dv') %>%
separate(variable_type, c("label_1", "label_2", "label_3"), sep = ",") %>%
mutate(label_1 = trim(label_1),
label_2=trim(label_2),
label_3=trim(label_3))%>%
filter(ddm_task == 1) %>%
separate(dv, c('task_name', 'extra_1', 'extra_2'), sep = '\\.', remove=FALSE) %>%
select(-extra_1, -extra_2) %>%
drop_na()
In addition to raw response time and accuracy measures we get from certain tasks we have also chosen to fit drift diffusion models to them. The tasks we have chosen to fit these models are: unique(tmp$task_name). This list does not include all cognitive tasks in the battery.
Table of median reliabilities for the two kinds of DDM models’ (hddm and EZ) parameters compared to raw response times and accuracies depending on whether they were fit to all data or whether they are contrast of two conditions sorted by mean ICC and mean Spearman’s \(\rho\).
tmp %>%
group_by(label_1,label_2, label_3) %>%
summarise(median_icc = median(icc),
median_spearman = median(spearman),
n_vars = n()) %>%
arrange(-median_icc, -median_spearman) %>%
datatable() %>%
formatRound(columns=c('median_spearman', 'median_icc'), digits=3)
Plot the distributions of different types of variables
measure_labels %>%
select(dv, task, variable_type, ddm_task, num_all_trials)%>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)])) %>%
filter(task == "task") %>%
select(-var, -task) %>%
left_join(rel_df[,c("dv", "icc")], by="dv")%>%
# filter(ddm_task == 1) %>%
separate(variable_type, c("overall_difference", "raw_EZ_hddm", "param"), extra="merge") %>%
mutate(raw_EZ_hddm = ifelse(ddm_task == 0, "other", raw_EZ_hddm),
param = ifelse(ddm_task == 0, "other", param)) %>%
na.omit() %>%
filter(overall_difference!='condition') %>%
ggplot(aes(x=factor(raw_EZ_hddm, levels = c("raw", "EZ", "hddm", "other"), labels=c("Raw", "EZ-diffusion", "Hierarchical diffusion", "Other")),y=icc,fill=param))+
geom_boxplot(position = position_dodge(width=0.75))+
facet_wrap(~factor(overall_difference, levels=c("overall", "difference"), labels=c("Overall", "Difference")))+
theme_bw()+
xlab("")+
ylab("ICC")+
theme(legend.title = element_blank())
Are these differences meaningful?
Reliabilities for difference variables is lower.
summary(lm(icc ~ label_1,tmp))
Call:
lm(formula = icc ~ label_1, data = tmp)
Residuals:
Min 1Q Median 3Q Max
-0.9483 -0.1124 0.0239 0.1275 0.4075
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2240 0.0198 11.3 <2e-16 ***
label_1overall 0.3887 0.0271 14.3 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.195 on 206 degrees of freedom
Multiple R-squared: 0.499, Adjusted R-squared: 0.497
F-statistic: 205 on 1 and 206 DF, p-value: <2e-16
When all trials are used the drift rates and rt’s have higher reliabilities compared to accuracies.
summary(lm(icc~label_3,tmp[tmp$label_1=="overall",]))
Call:
lm(formula = icc ~ label_3, data = tmp[tmp$label_1 == "overall",
])
Residuals:
Min 1Q Median 3Q Max
-0.8525 -0.0790 0.0176 0.0744 0.2787
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5169 0.0413 12.50 < 2e-16 ***
label_3drift rate 0.1948 0.0494 3.94 0.00015 ***
label_3non-decision 0.0049 0.0494 0.10 0.92122
label_3rt 0.2090 0.0555 3.77 0.00027 ***
label_3threshold 0.0681 0.0494 1.38 0.17119
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.143 on 106 degrees of freedom
Multiple R-squared: 0.276, Adjusted R-squared: 0.249
F-statistic: 10.1 on 4 and 106 DF, p-value: 5.7e-07
Do people differ in how much their scores change depending on how many days it has been since they completed the initial battery?
Make data frame with difference between two scores for each measure for each subject. Since the scores for different measures are on different scales for comparability the difference scores are scaled (i.e. divided by their root mean square) but not centered so a value of 0 for the difference scores would indicate a lack of a difference between the scores of a subject.
t1_2_difference = data.frame()
for(i in 1:length(numeric_cols)){
tmp = match_t1_t2(numeric_cols[i],format='wide')
tmp = tmp %>%
# mutate(difference = scale(`2` - `1`))
mutate(difference = scale(`2` - `1`, center=F))
t1_2_difference = rbind(t1_2_difference, tmp)
}
t1_2_difference$difference = as.data.frame(t1_2_difference$difference)$V1
Add completion dates to this data frame.
t1_2_difference = merge(t1_2_difference, comp_dates[,c('sub_id', 'days_btw')], by='sub_id')
The effect of number of days in between in the full model where the difference scores are regressed on a fixed effect for measure and days between the two scores and random intercepts for each subject (Should this model include the interaction between the fixed effects?). Since there are over 300 measures the output of the full model is not presented here. Instead below are the coefficient for the fixed effect of days between completion times and its t value.
mod = lmer(difference ~ dv + days_btw + (1|sub_id), data = t1_2_difference)
data.frame(estimate=coef(summary(mod))["days_btw","Estimate"], tval=coef(summary(mod))["days_btw","t value"])
For visualization purposes I summarized the difference scores per person by looking at the average difference and plot that against the number of days between completion.
tmp = t1_2_difference %>%
group_by(sub_id) %>%
summarise(mean_diff = mean(difference, na.rm=T),
days_btw = unique(days_btw))
ggplotly(tmp %>%
ggplot(aes(days_btw, mean_diff))+
geom_point()+
theme_bw()+
xlab("Days between completion")+
ylab("Mean standardized difference between two time points")+
geom_smooth(method="lm"))
To confirm: the slope of this line is not significant. That is, there doesn’t seem to be a systematic difference between the two measurements depending on the number of days between the two measurements.
summary(lm(mean_diff ~ days_btw, data=tmp))
Call:
lm(formula = mean_diff ~ days_btw, data = tmp)
Residuals:
Min 1Q Median 3Q Max
-0.30604 -0.08129 0.00245 0.06670 0.23174
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.047082 0.033901 -1.39 0.17
days_btw 0.000444 0.000287 1.55 0.12
Residual standard error: 0.103 on 148 degrees of freedom
Multiple R-squared: 0.0159, Adjusted R-squared: 0.00928
F-statistic: 2.4 on 1 and 148 DF, p-value: 0.124
Summarized bootstrapped reliabilities
boot_df <- read.csv('/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/input/bootstrap_merged.csv')
boot_df = boot_df %>%
drop_na() %>%
mutate(icc = as.numeric(as.character(icc)),
spearman = as.numeric(as.character(spearman)),
eta_sq = as.numeric(as.character(eta_sq)),
sem = as.numeric(as.character(sem)))
boot_df %>%
group_by(dv) %>%
summarise(icc_median = quantile(icc, probs = 0.5),
icc_2.5 = quantile(icc, probs = 0.025),
icc_97.5 = quantile(icc, probs = 0.975),
spearman_median = quantile(spearman, probs = 0.5),
spearman_2.5 = quantile(spearman, probs = 0.025),
spearman_97.5 = quantile(spearman, probs = 0.975)) %>%
datatable() %>%
formatRound(columns=c('icc_median', 'icc_2.5', 'icc_97.5', 'spearman_median', 'spearman_2.5', 'spearman_97.5'), digits=3)
Comparison of survey measures to cognitive task measures in the bootstrapped results. Multilevel model with random intercepts for each measure and fixed effect of survey versus cognitive measure. (Same result as seen in point estimates comparison).
boot_df = boot_df %>%
mutate(task = ifelse(grepl("survey",dv), "survey","task"),
task = ifelse(grepl("holt",dv), "task", task))
summary(lmerTest::lmer(icc ~ task + (1|dv), boot_df))
Linear mixed model fit by REML t-tests use Satterthwaite approximations
to degrees of freedom [lmerMod]
Formula: icc ~ task + (1 | dv)
Data: boot_df
REML criterion at convergence: -733186
Scaled residuals:
Min 1Q Median 3Q Max
-11.499 -0.399 0.020 0.450 7.476
Random effects:
Groups Name Variance Std.Dev.
dv (Intercept) 0.05756 0.240
Residual 0.00689 0.083
Number of obs: 344000, groups: dv, 344
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.8701 0.0289 344.0000 30.1 <2e-16 ***
tasktask -0.4103 0.0323 344.0000 -12.7 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
tasktask -0.894
Checking DDM results in the bootstrapped estimates. Same as before too.
tmp = measure_labels %>%
mutate(dv = as.character(dv)) %>%
filter(task == 'task') %>%
separate(variable_type, c("label_1", "label_2", "label_3"), sep = ",") %>%
mutate(label_1 = trim(label_1),
label_2=trim(label_2),
label_3=trim(label_3))%>%
filter(ddm_task == 1) %>%
separate(dv, c('task_name', 'extra_1', 'extra_2'), sep = '\\.', remove=FALSE) %>%
select(-extra_1, -extra_2) %>%
drop_na() %>%
left_join(boot_df[,c("dv", "icc")], by = 'dv')
summary(lmerTest::lmer(icc ~ label_1 + (1|dv) ,tmp))
Linear mixed model fit by REML t-tests use Satterthwaite approximations
to degrees of freedom [lmerMod]
Formula: icc ~ label_1 + (1 | dv)
Data: tmp
REML criterion at convergence: -276926
Scaled residuals:
Min 1Q Median 3Q Max
-9.968 -0.462 0.041 0.520 5.572
Random effects:
Groups Name Variance Std.Dev.
dv (Intercept) 0.03714 0.1927
Residual 0.00916 0.0957
Number of obs: 150000, groups: dv, 150
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.2043 0.0218 148.3000 9.36 <2e-16 ***
label_1overall 0.4334 0.0315 148.3000 13.76 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
label_1vrll -0.693
summary(lmerTest::lmer(icc ~ label_3 + (1|dv) ,tmp))
Linear mixed model fit by REML t-tests use Satterthwaite approximations
to degrees of freedom [lmerMod]
Formula: icc ~ label_3 + (1 | dv)
Data: tmp
REML criterion at convergence: -276804
Scaled residuals:
Min 1Q Median 3Q Max
-9.968 -0.462 0.040 0.521 5.573
Random effects:
Groups Name Variance Std.Dev.
dv (Intercept) 0.08105 0.2847
Residual 0.00916 0.0957
Number of obs: 150000, groups: dv, 150
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.142 0.127 146.700 1.12 0.266
label_3drift rate 0.335 0.133 146.700 2.52 0.013 *
label_3non-decision 0.265 0.137 146.700 1.94 0.054 .
label_3rt 0.285 0.140 146.700 2.03 0.044 *
label_3threshold 0.197 0.137 146.700 1.44 0.151
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) lbl_3dr lbl_3- lbl_3rt
lbl_3drftrt -0.957
lbl_3nn-dcs -0.932 0.892
label_3rt -0.910 0.871 0.848
lbl_3thrshl -0.932 0.892 0.868 0.848
poster_vars <- read.csv('/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/input/poster_var_list.csv', header=F)
names(poster_vars) = "var"
boot_df_poster = boot_df[boot_df$dv %in% poster_vars$var,]
boot_df_poster = boot_df_poster%>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)])) %>%
separate(var, c("var"), sep="\\.",remove=TRUE,extra="drop") %>%
mutate(task_group = gsub("_", " ", task_group),
var = gsub("_", " ", var)) %>%
arrange(task_group, var)
boot_df_poster = boot_df_poster %>%
left_join(rel_df[,c("dv", "icc")], by = "dv") %>%
rename(icc = icc.x, point_est = icc.y)
#Manual correction
boot_df_poster = boot_df_poster %>%
mutate(task = ifelse(task_group == 'holt laury survey', "task", task)) %>%
mutate(task_group = ifelse(task_group == "psychological refractory period two choices", "psychological refractory period", ifelse(task_group == "angling risk task always sunny", "angling risk task",task_group))) %>%
mutate(task_group = gsub("survey", "", task_group))
p4 <- boot_df_poster %>%
filter(task == 'task') %>%
ggplot(aes(y = var, x = icc)) +
geom_point(color = '#00BFC4')+
geom_point(aes(y = var, x = point_est), color = "black")+
facet_grid(task_group~., switch = "y", scales = "free_y", space = "free_y", labeller = label_wrap_gen(width=20)) +
theme(panel.spacing = unit(0.75, "lines"),
strip.placement = "outside",
strip.text.y = element_text(angle=180, size=36),
axis.text.y = element_text(size=20),
panel.background = element_rect(fill = NA),
panel.grid.major = element_line(colour = "grey80")) +
xlab("")+
ylab("")+
scale_x_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
scale_shape_manual(breaks = sort(boot_df_poster$type), values = c(15, 17, 3))+
geom_vline(xintercept = 0, color = "red", size = 1)
p5 <- boot_df_poster %>%
filter(task == 'survey') %>%
ggplot(aes(y = var, x = icc)) +
geom_point(color = '#F8766D')+
geom_point(aes(y = var, x = point_est), color = "black")+
facet_grid(task_group~., switch = "y", scales = "free_y", space = "free_y", labeller = label_wrap_gen(width=20)) +
theme(panel.spacing = unit(0.75, "lines"),
strip.placement = "outside",
strip.text.y = element_text(angle=180, size=36),
axis.text.y = element_text(size=20),
panel.background = element_rect(fill = NA),
panel.grid.major = element_line(colour = "grey80")) +
xlab("")+
ylab("")+
scale_x_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
scale_shape_manual(breaks = sort(boot_df_poster$type), values = c(15, 17, 3))+
geom_vline(xintercept = 0, color = "red", size = 1)
p6 <- arrangeGrob(p4, p5,nrow=1)
ggsave('Bootstrap_Poster_Plot.jpg', plot = p6, device = "jpeg", path = "/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/output/figures/", width = 24, height = 42, units = "in", limitsize = FALSE)
p4_t <- boot_df_poster %>%
filter(task == 'task') %>%
ggplot(aes(x = factor(task_group, levels=rev(unique(task_group))), y = icc)) +
geom_violin(fill='#00BFC4')+
theme_bw() +
theme(axis.text.y = element_text(size=30))+
xlab("")+
ylab("")+
scale_y_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 15))+
geom_hline(yintercept = 0, color = "red", size = 1)+
coord_flip()
p5_t <- boot_df_poster %>%
filter(task == 'survey') %>%
ggplot(aes(x = factor(task_group, levels=rev(unique(task_group))), y = icc)) +
geom_violin(fill='#F8766D')+
theme_bw() +
theme(axis.text.y = element_text(size=43))+
xlab("")+
ylab("")+
scale_y_continuous(limits = c(-0.25,1), breaks=c(-0.25, 0, 0.25, 0.5, 0.75, 1))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))+
geom_hline(yintercept = 0, color = "red", size = 1)+
coord_flip()
p6_t <- arrangeGrob(p4_t, p5_t,nrow=1)
Warning: Removed 431 rows containing non-finite values (stat_ydensity).
ggsave('Bootstrap_Poster_Plot_t.jpg', plot = p6_t, device = "jpeg", path = "../output/figures/", width = 27, height = 48, units = "in", limitsize = FALSE)
p1 = rel_df %>%
mutate(var_subs_pct = var_subs/(var_subs+var_ind+var_resid)*100,
var_ind_pct = var_ind/(var_subs+var_ind+var_resid)*100,
var_resid_pct = var_resid/(var_subs+var_ind+var_resid)*100) %>%
select(dv, task, var_subs_pct, var_ind_pct, var_resid_pct) %>%
gather(key, value, -dv, -task) %>%
mutate(key = factor(key, levels = c("var_subs_pct", "var_ind_pct", "var_resid_pct")),
dv = factor(dv, levels = dv[order(task)])) %>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)])) %>%
# separate(var, c("var"), sep="\\.",remove=TRUE,extra="drop") %>%
mutate(task_group = gsub("_", " ", task_group),
var = gsub("_", " ", var),
var=gsub(".ReflogTr", "", var),
var=gsub(".logTr", "", var)) %>%
mutate(task_group = ifelse(task_group == "psychological refractory period two choices", "psychological refractory period", ifelse(task_group == "angling risk task always sunny", "angling risk task",task_group))) %>%
mutate(task_group = gsub("survey", "", task_group)) %>%
filter(task=="task",
!grepl("EZ|hddm", dv))%>%
ggplot(aes(x=var, y=value, fill=key))+
geom_bar(stat='identity', alpha = 0.75)+
coord_flip()+
facet_grid(task_group~., switch = "y", scales = "free_y", space = "free_y") +
theme(panel.spacing = unit(0.5, "lines"),
strip.placement = "outside",
strip.text.y = element_text(angle=180),
panel.background = element_rect(fill = NA),
panel.grid.major = element_line(colour = "grey80"),
legend.position = 'bottom')+
#theme_bw()+
theme(legend.title = element_blank())+
scale_fill_discrete(breaks = c("var_subs_pct", "var_ind_pct", "var_resid_pct"),
labels = c("Variance between individuals",
"Variance between sessions",
"Error variance"),
h=c(100,500))+
ylab("")+
xlab("")
p2 = rel_df %>%
mutate(var_subs_pct = var_subs/(var_subs+var_ind+var_resid)*100,
var_ind_pct = var_ind/(var_subs+var_ind+var_resid)*100,
var_resid_pct = var_resid/(var_subs+var_ind+var_resid)*100) %>%
select(dv, task, var_subs_pct, var_ind_pct, var_resid_pct) %>%
gather(key, value, -dv, -task) %>%
mutate(key = factor(key, levels = c("var_subs_pct", "var_ind_pct", "var_resid_pct")),
dv = factor(dv, levels = dv[order(task)])) %>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)])) %>%
separate(var, c("var"), sep="\\.",remove=TRUE,extra="drop") %>%
mutate(task_group = gsub("_", " ", task_group),
var = gsub("_", " ", var)) %>%
mutate(task_group = ifelse(task_group == "psychological refractory period two choices", "psychological refractory period", ifelse(task_group == "angling risk task always sunny", "angling risk task",task_group))) %>%
mutate(task_group = gsub("survey", "", task_group)) %>%
filter(task=="survey")%>%
ggplot(aes(x=var, y=value, fill=key))+
geom_bar(stat='identity', alpha = 0.75)+
coord_flip()+
facet_grid(task_group~., switch = "y", scales = "free_y", space = "free_y") +
theme(panel.spacing = unit(0.5, "lines"),
strip.placement = "outside",
strip.text.y = element_text(angle=180),
panel.background = element_rect(fill = NA),
panel.grid.major = element_line(colour = "grey80"),
legend.position = 'bottom')+
#theme_bw()+
theme(legend.title = element_blank())+
scale_fill_discrete(breaks = c("var_subs_pct", "var_ind_pct", "var_resid_pct"),
labels = c("Variance between individuals",
"Variance between sessions",
"Error variance"),
h=c(100,500))+
ylab("")+
xlab("")
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
mylegend<-g_legend(p1)
p3 <- arrangeGrob(arrangeGrob(p1 +theme(legend.position="none"),
p2 + theme(legend.position="none"),
nrow=1),
mylegend, nrow=2,heights=c(10, 1))
ggsave('Variance_Breakdown_Plot.jpg', plot = p3, device = "jpeg", path = "/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/output/figures", width = 24, height = 20, units = "in")
Comparing types of variance for survey vs task measures: Survey measures have higher between subject variability
Note: This analysis includes DDM variables too.
tmp = rel_df %>%
mutate(var_subs_pct = var_subs/(var_subs+var_ind+var_resid)*100,
var_ind_pct = var_ind/(var_subs+var_ind+var_resid)*100,
var_resid_pct = var_resid/(var_subs+var_ind+var_resid)*100) %>%
select(dv, task, var_subs_pct, var_ind_pct, var_resid_pct) %>%
gather(key, value, -dv, -task)
# do(tidy(lm(value~key*task, data=.)))
summary(lm(value~key*task,tmp))
Call:
lm(formula = value ~ key * task, data = tmp)
Residuals:
Min 1Q Median 3Q Max
-42.60 -11.33 -1.62 10.78 60.78
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.693 2.079 5.14 3.2e-07 ***
keyvar_resid_pct -0.757 2.940 -0.26 0.80
keyvar_subs_pct 68.678 2.940 23.36 < 2e-16 ***
tasktask 17.481 2.324 7.52 1.2e-13 ***
keyvar_resid_pct:tasktask -3.369 3.286 -1.03 0.31
keyvar_subs_pct:tasktask -49.074 3.286 -14.93 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 17.3 on 1032 degrees of freedom
Multiple R-squared: 0.5, Adjusted R-squared: 0.498
F-statistic: 207 on 5 and 1032 DF, p-value: <2e-16
aggregate(value ~task+key, FUN=mean, data=tmp)
Summarizing for clearer presentation
tmp %>%
group_by(task, key) %>%
ggplot(aes(task, value, fill=key))+
geom_boxplot()+
theme_bw()+
theme(legend.title = element_blank())+
scale_fill_discrete(breaks = c("var_subs_pct", "var_ind_pct", "var_resid_pct"),
labels = c("Variance between individuals",
"Variance between sessions",
"Error variance"),
h=c(100,500))+
xlab("")+
ylab("Percent")
ggsave('Variance_Breakdown_Plot_Summary.jpg', device = "jpeg", path = "/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/output/figures", width = 6, height = 5, units = "in")
What does it look like for DDM variables separately?
p1_ddm = rel_df %>%
mutate(var_subs_pct = var_subs/(var_subs+var_ind+var_resid)*100,
var_ind_pct = var_ind/(var_subs+var_ind+var_resid)*100,
var_resid_pct = var_resid/(var_subs+var_ind+var_resid)*100) %>%
select(dv, task, var_subs_pct, var_ind_pct, var_resid_pct) %>%
gather(key, value, -dv, -task) %>%
mutate(key = factor(key, levels = c("var_subs_pct", "var_ind_pct", "var_resid_pct")),
dv = factor(dv, levels = dv[order(task)])) %>%
separate(dv, c("task_group", "var"), sep="\\.",remove=FALSE,extra="merge") %>%
mutate(task_group = factor(task_group, levels = task_group[order(task)])) %>%
# separate(var, c("var"), sep="\\.",remove=TRUE,extra="drop") %>%
mutate(task_group = gsub("_", " ", task_group),
var = gsub("_", " ", var),
var=gsub(".ReflogTr", "", var),
var=gsub(".logTr", "", var)) %>%
mutate(task_group = ifelse(task_group == "psychological refractory period two choices", "psychological refractory period", ifelse(task_group == "angling risk task always sunny", "angling risk task",task_group))) %>%
mutate(task_group = gsub("survey", "", task_group)) %>%
filter(task=="task",
grepl("EZ|hddm", dv))%>%
ggplot(aes(x=var, y=value, fill=key))+
geom_bar(stat='identity', alpha = 0.75)+
coord_flip()+
facet_grid(task_group~., switch = "y", scales = "free_y", space = "free_y") +
theme(panel.spacing = unit(0.5, "lines"),
strip.placement = "outside",
strip.text.y = element_text(angle=180),
panel.background = element_rect(fill = NA),
panel.grid.major = element_line(colour = "grey80"),
legend.position = 'bottom')+
#theme_bw()+
theme(legend.title = element_blank())+
scale_fill_discrete(breaks = c("var_subs_pct", "var_ind_pct", "var_resid_pct"),
labels = c("Variance between individuals",
"Variance between sessions",
"Error variance"),
h=c(100,500))+
ylab("")+
xlab("")
ggsave('Variance_Breakdown_Plot_DDM.jpg', plot = p1_ddm, device = "jpeg", path = "/Users/zeynepenkavi/Dropbox/PoldrackLab/SRO_Retest_Analyses/output/figures", width = 12, height = 20, units = "in")